Pacages for the script

Results

Setup

functions

Mean function for ggplot2 plots - the function adds a label with the column mean.

meanFunction <- function(x){
  return(data.frame(y=round(mean(x),2),label=round(mean(x,na.rm=T),2)))}

themes to use with ggplot2

themes_ggplot <- theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
  font("xlab", size = 12, color = "black", face = "bold") +
  font("ylab", size = 12, color = "black", face = "bold") +
  font("title", size = 14, color = "black", face = "bold")+
  font("legendtitle", size = 12, color = "black", face = "bold")+
  font("legendtext", size = 12, color = "black")+
  font("xy.text", size = 12, color = "black")+
  font("ylab", size = 12, color = "black", face = "bold") +
  font("xlab", size = 12, color = "black", face = "bold") +
  theme(strip.text.x = element_text( size = 12, color = "black", face = "bold"))+
  theme(strip.text.y = element_text( size = 12, color = "black", face = "bold"))+
  theme(legend.position='top')

general parameters

The data were downloded from SRA by SRAtoolkit. The information about each sample were obtained from the dbGAP database.

sra_table_path <- "raw/Cornell/SraRunTable.txt"
dbgap_data_path <- "raw/Cornell/combined_sample_subject_attribute.csv"

#read and combine information table together
dbgap_table <- read.csv(file= dbgap_data_path, stringsAsFactors=FALSE)
sra_table_part <- read.csv(file= sra_table_path, stringsAsFactors=FALSE)
sra_table <- left_join(dbgap_table,sra_table_part,by = c("BioSample.Accession"="BioSample"))

#take only required columns
vector_sra_colmns <- c("RT_PCR","VIRAL_LOAD")
sra_table <- dplyr::select(sra_table,c("Run",vector_sra_colmns))
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(vector_sra_colmns)` instead of `vector_sra_colmns` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
#Create a new column of controls and COVID-19 patients, based on viral load values.
sra_table$Covid_Load <- NA
sra_table$Covid_Load[sra_table$VIRAL_LOAD == "None"] <- "Control" 
sra_table$Covid_Load[sra_table$VIRAL_LOAD == "Low"] <- "COVID-19" 
sra_table$Covid_Load[sra_table$VIRAL_LOAD == "Medium"] <- "COVID-19" 
sra_table$Covid_Load[sra_table$VIRAL_LOAD == "High"] <- "COVID-19" 
sra_table$Covid_Load[sra_table$VIRAL_LOAD == "OtherViralInfection"] <- "Other Viruses" 

#orgenize the values in each column
sra_table$RT_PCR <- factor(sra_table$RT_PCR,levels = c("NotDetected","Detected"))
sra_table$VIRAL_LOAD <- factor(sra_table$VIRAL_LOAD ,levels = c("None","Low", 
                                                                "Medium", "High", 
                                                                "OtherViralInfection"))
sra_table$Covid_Load <- factor(sra_table$Covid_Load ,levels = c("Control","Other Viruses", "COVID-19"))

Analysis tables

SALMON

read ADAR expression levels.

salmon_path <- "raw/Cornell/SalmonTPM_ADAR.wideGenes.csv"
df_salmon <-  read.csv(file=salmon_path,header=T, stringsAsFactors=FALSE)
# sra_table$Run[!sra_table$Run %in% df_salmon$Sample] # detect failed samples
# length(df_salmon$Sample) == length(unique(df_salmon$Sample))
# detect samples with no values at all.
# df_salmon[ c(df_salmon$ADARB1 == 0 & df_salmon$ADARB2 == 0 & 
#                df_salmon$ADAR == 0 & df_salmon$ADARB2.AS1 == 0 ),]
# df_salmon$Sample[ c(df_salmon$ADARB1 == 0 &  df_salmon$ADARB2 == 0 & 
#                       df_salmon$ADAR == 0 & df_salmon$ADARB2.AS1 == 0 )]

#read and combine ISG score.
df_ISG <- read.csv("raw/Cornell/ArticleCalcLog2_ISG_madz_scores.csv")
df_salmon <- left_join(df_salmon, df_ISG, by="Sample")

STAR

STAR_stats <- read.csv("raw/Cornell/STAR_stats.csv", check.names = F)
# length(STAR_stats$Sample)
# length(unique(STAR_stats$Sample))
#remove duplicated lines and samples that did not map at all.
STAR_stats_removedDup <- unique(STAR_stats)
STAR_stats_removedDup <- STAR_stats_removedDup[STAR_stats_removedDup$`Uniquely mapped reads number`>0,]

Alu-index

aluIndex_table_path <-"raw/Cornell/ALuIndexEditingIndex.csv"
alu_table_index = read.csv(file=aluIndex_table_path,header=T, stringsAsFactors=FALSE)

alu_table_index$Sample <- gsub(".", "", alu_table_index$Sample,fixed = TRUE) #fix sample name if needed.
names(alu_table_index)[names(alu_table_index) == "Sample"] <- "Run" 
#select requird columns
alu_table_index_combined <- dplyr::select(alu_table_index,"Run",
                                    "A2GEditingIndex","A2TEditingIndex",
                                    "C2AEditingIndex","C2TEditingIndex",
                                    "C2GEditingIndex","A2CEditingIndex")

# filter the samples from failed samples and duplicated samples.
alu_table_index_combined <- unique(alu_table_index_combined)
alu_table_index_above0 <- alu_table_index_combined[!
                  c(alu_table_index_combined$A2GEditingIndex == 0 & 
                      alu_table_index_combined$A2TEditingIndex == 0 &
                      alu_table_index_combined$C2AEditingIndex == 0 & 
                      alu_table_index_combined$C2TEditingIndex == 0 &
                      alu_table_index_combined$C2GEditingIndex == 0 & 
                      alu_table_index_combined$A2CEditingIndex ==0),]
alu_table_index_combined <- alu_table_index_above0

#change the column names
names(alu_table_index_combined) <- gsub("EditingIndex","EditingIndex_Alu",
                                            names(alu_table_index_combined))

3UTR Alu-index

alu3UTR_table_path <- "raw/Cornell/EditingIndex_sammary_3UTR.csv"
alu3UTR_table_index = read.csv(file=alu3UTR_table_path,header=T, stringsAsFactors=FALSE)
alu3UTR_table_index$Sample <- gsub(".", "", alu3UTR_table_index$Sample,fixed = TRUE) #fix sample name if needed.

names(alu3UTR_table_index)[names(alu3UTR_table_index) == "Sample"] <- "Run" 
alu3UTR_table_index_combined <- dplyr::select(alu3UTR_table_index,"Run",#vector_sra_colmns,
                                    "A2GEditingIndex","A2TEditingIndex",
                                    "C2AEditingIndex","C2TEditingIndex",
                                    "C2GEditingIndex","A2CEditingIndex")

# filter the samples from failed samples and duplicated samples.
alu3UTR_table_index_combined <- unique(alu3UTR_table_index_combined)
alu3UTR_table_index_combined <- alu3UTR_table_index_combined[!
                  c(alu3UTR_table_index_combined$A2GEditingIndex == 0 & 
                      alu3UTR_table_index_combined$A2TEditingIndex == 0 &
                      alu3UTR_table_index_combined$C2AEditingIndex == 0 & 
                      alu3UTR_table_index_combined$C2TEditingIndex == 0 &
                      alu3UTR_table_index_combined$C2GEditingIndex == 0 & 
                      alu3UTR_table_index_combined$A2CEditingIndex ==0),]


#change the column names
names(alu3UTR_table_index_combined) <- gsub("EditingIndex","EditingIndex_3UTR",
                                            names(alu3UTR_table_index_combined))

Verify failed analsys

sra_table[!sra_table$Run %in% df_salmon$Sample,"Covid_Load"]
## [1] Control  Control  COVID-19 Control 
## Levels: Control Other Viruses COVID-19
sra_table[!sra_table$Run %in% df_ISG$Sample,"Covid_Load"]
## [1] Control  Control  COVID-19 Control 
## Levels: Control Other Viruses COVID-19
sra_table[!sra_table$Run %in% STAR_stats_removedDup$Sample,"Covid_Load"]
## [1] Control  Control  COVID-19 Control 
## Levels: Control Other Viruses COVID-19
table(sra_table[!sra_table$Run %in% alu_table_index_combined$Run,"Covid_Load"])
## 
##       Control Other Viruses      COVID-19 
##            14             1             2
table(left_join(STAR_stats_removedDup,sra_table,by=c("Sample"="Run"))[!STAR_stats_removedDup$Sample %in% alu_table_index_combined$Run,"Covid_Load"])
## 
##       Control Other Viruses      COVID-19 
##            11             1             1
# sra_table[!sra_table$Run %in% df_salmon$Sample,c("Run","Covid_Load")]
# sra_table[!sra_table$Run %in% df_ISG$Sample,c("Run","Covid_Load")]
# sra_table[!sra_table$Run %in% STAR_stats_removedDup$Sample,c("Run","Covid_Load")]
# sra_table[!sra_table$Run %in% alu_table_index_combined$Run,c("Run","Covid_Load")]
table(sra_table[!sra_table$Run %in% alu_table_index_combined$Run,c("Covid_Load")])
## 
##       Control Other Viruses      COVID-19 
##            14             1             2
table(left_join(STAR_stats_removedDup,sra_table,by=c("Sample"="Run"))[!STAR_stats_removedDup$Sample %in% alu_table_index_combined$Run,"Covid_Load"])
## 
##       Control Other Viruses      COVID-19 
##            11             1             1

combine the plots

OUT_DIR = "results/Cornell/"
dir.create(OUT_DIR)
## Warning in dir.create(OUT_DIR): 'results/Cornell' already exists
#combine all tables to one table
df_combined <- left_join(alu_table_index_combined,sra_table ,by="Run")
df_combined <- left_join(df_combined,alu3UTR_table_index_combined ,by="Run")
df_combined <- left_join(df_combined,df_salmon,by=c("Run"="Sample"))
df_combined <- left_join(df_combined,STAR_stats_removedDup,by=c("Run"="Sample"))

#verify that there no duplicated samples
length(unique(df_combined$Run)) == length(df_combined$Run)
## [1] TRUE
out_path <- paste0(OUT_DIR,"/combinedResults_MainResults.csv")
write.csv(df_combined, out_path, row.names = F, quote = F)


df_combined %>% 
  filter(Covid_Load != "Other Viruses" ) %>%
  mutate(Sample = Run) %>%
  dplyr::select(Sample, A2GEditingIndex_Alu) %>% write.csv(paste0(OUT_DIR,"/AEI_sample.csv"), 
                                    row.names = F, quote = F)

Plots

OUT_DIR = "results/Cornell/"
dir.create(OUT_DIR)
## Warning in dir.create(OUT_DIR): 'results/Cornell' already exists

General statistics

Generally summarize the results.

df_combined_Stats <- df_combined %>% 
  group_by(Covid_Load) %>% #nrow()
  summarize(ADAR = paste(round(mean(ADAR),2), "(",round(min(ADAR),2),", ", 
                         round(max(ADAR),2),", ",round(median(ADAR),2),", +-",
                         round(sd(ADAR),2),")"),
            ADARB1 = paste(round(mean(ADARB1),2), "(",round(min(ADARB1),2),", ", 
                           round(max(ADARB1),2),", ",round(median(ADARB1),2),", +-",
                           round(sd(ADARB1),2),")"),
            ADARB2 = paste(round(mean(ADARB2),2), "(",round(min(ADARB2),2),", ", 
                           round(max(ADARB2),2),", ",round(median(ADARB2),2),", +-",
                           round(sd(ADARB2),2),")"),
            ADARB2.AS1 = paste(round(mean(ADARB2.AS1),2), "(",round(min(ADARB2.AS1),2),", ", 
                               round(max(ADARB2.AS1),2),", ",round(median(ADARB2.AS1),2),", +-",
                               round(sd(ADARB2.AS1),2),")"),
            ISG38_score = paste(round(mean(ISG38_score),2), "(",round(min(ISG38_score),2),", ", 
                                round(max(ISG38_score),2),", ",round(median(ISG38_score),2), ", +-",
                                round(sd(ISG38_score),2),")"),
           A2GEditingIndex_Alu = paste(round(mean(A2GEditingIndex_Alu),2), "(",
                                       round(min(A2GEditingIndex_Alu),2),", ", 
                                    round(max(A2GEditingIndex_Alu),2),", ",
                                    round(median(A2GEditingIndex_Alu),2), ", +-", 
                                    round(sd(A2GEditingIndex_Alu),2),")"),
            A2GEditingIndex_3UTR = paste(round(mean(A2GEditingIndex_3UTR),2), "(",
                                         round(min(A2GEditingIndex_3UTR),2),", ", 
                                      round(max(A2GEditingIndex_3UTR),2),", ",
                                      round(median(A2GEditingIndex_3UTR),2),", +-", 
                                      round(sd(A2GEditingIndex_3UTR),2),")")
            ) 
write.csv(df_combined_Stats, paste0(OUT_DIR,"/combinedResults_mean_min_max_median_sd.csv"), 
          row.names = F, quote = F)
df_combined %>% 
  group_by(VIRAL_LOAD) %>% 
  summarize(ADAR = paste(round(mean(ADAR),2), "(",round(min(ADAR),2),", ", 
                         round(max(ADAR),2),", ",round(median(ADAR),2),")"),
            ADARB1 = paste(round(mean(ADARB1),2), "(",round(min(ADARB1),2),", ", 
                           round(max(ADARB1),2),", ",round(median(ADARB1),2),")"),
            ADARB2 = paste(round(mean(ADARB2),2), "(",round(min(ADARB2),2),", ", 
                           round(max(ADARB2),2),", ",round(median(ADARB2),2),")"),
            ADARB2.AS1 = paste(round(mean(ADARB2.AS1),2), "(",round(min(ADARB2.AS1),2),", ", 
                               round(max(ADARB2.AS1),2),", ",round(median(ADARB2.AS1),2),")"),
            ISG38_score = paste(round(mean(ISG38_score),2), "(",round(min(ISG38_score),2),", ", 
                                round(max(ISG38_score),2),", ",round(median(ISG38_score),2),")"),
           A2GEditingIndex_Alu = paste(round(mean(A2GEditingIndex_Alu),2), "(",
                                       round(min(A2GEditingIndex_Alu),2),", ", 
                                    round(max(A2GEditingIndex_Alu),2),", ",
                                    round(median(A2GEditingIndex_Alu),2),")"),
            A2GEditingIndex_3UTR = paste(round(mean(A2GEditingIndex_3UTR),2), "(",
                                         round(min(A2GEditingIndex_3UTR),2),", ", 
                                      round(max(A2GEditingIndex_3UTR),2),", ",
                                      round(median(A2GEditingIndex_3UTR),2),")")
            )
## # A tibble: 5 × 8
##   VIRAL_LOAD         ADAR  ADARB1 ADARB2 ADARB2.AS1 ISG38_score A2GEditingIndex…
##   <fct>              <chr> <chr>  <chr>  <chr>      <chr>       <chr>           
## 1 None               90.4… 3.52 … 1.65 … 0.01 ( 0 … -0.39 ( -4… 1.62 ( 0.09 ,  …
## 2 Low                131.… 6.54 … 1.96 … 0 ( 0 ,  … 0.04 ( -4.… 1.67 ( 0.18 ,  …
## 3 Medium             176.… 5.14 … 1.13 … 0 ( 0 ,  … 0.44 ( -1.… 1.83 ( 0.63 ,  …
## 4 High               201.… 4.11 … 1.33 … 0.01 ( 0 … 0.64 ( -0.… 1.91 ( 0.47 ,  …
## 5 OtherViralInfecti… 152.… 5.1 (… 1.17 … 0.02 ( 0 … 0.28 ( -0.… 1.73 ( 0.45 ,  …
## # … with 1 more variable: A2GEditingIndex_3UTR <chr>

Salmon

y_position = max(df_combined$ADAR)

c <- df_combined %>% 
  filter(Covid_Load != "Other Viruses") %>%
  compare_means(ADAR~Covid_Load, data = .)
c %<>% mutate(y_pos = c(y_position *1), labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"

c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"

c$p.adj.format.stars[c$p.adj < 0.05] <- paste0("* (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.01] <- paste0("** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.001] <- paste0("*** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 2e-16] <- paste0("**** (p=", c$p.adj,")")

df_c <- as.data.frame(c)

adar1 <- df_combined %>%
  filter(Covid_Load != "Other Viruses") %>%
  ggplot( 
               aes(y=ADAR, x=Covid_Load))+
  geom_boxplot(aes(fill=Covid_Load)) +  geom_jitter( size=2, alpha = 0.2)+
  facet_grid(  ~ "ADAR1" ,  drop = TRUE, scales = "free", space = "free",)+
  themes_ggplot+ ylab("Normalized Expression (TPM)")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
  expand_limits(y = 0) + scale_fill_manual(values=c("blue", "red"))+
  theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
  geom_signif(data = df_c, 
              aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
  adar1

out_path <- paste0(OUT_DIR,'/',"SALMON_ADAR1_CovidComb.pdf")
ggsave(out_path, adar1, width = 10, height = 6)
y_position = max(df_combined$ADARB1)

c <- df_combined %>% 
  filter(Covid_Load != "Other Viruses") %>%
  compare_means(ADARB1~Covid_Load, data = .)
c %<>% mutate(y_pos = c(y_position *1), labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"

c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns" #c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"

c$p.adj.format.stars[c$p.adj < 0.05] <- paste0("* (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.01] <- paste0("** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.001] <- paste0("*** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 2e-16] <- paste0("**** (p=", c$p.adj,")")

df_c <- as.data.frame(c)

adar2 <- 
  df_combined %>%   filter(Covid_Load != "Other Viruses") %>%
  ggplot( 
               aes(y=ADARB1, x=Covid_Load))+
  geom_boxplot(aes(fill=Covid_Load)) +  geom_jitter( size=2, alpha = 0.2)+
  facet_grid(  ~ "ADAR2" ,  drop = TRUE, scales = "free", space = "free",)+
  themes_ggplot+ ylab("Normalized Expression (TPM)")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
  expand_limits(y = 0) + scale_fill_manual(values=c("blue", "red"))+
  theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
  geom_signif(data = df_c, 
              aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
  adar2

out_path <- paste0(OUT_DIR,'/',"SALMON_ADAR2_CovidComb.pdf")
ggsave(out_path, adar2, width = 10, height = 6)
y_position = max(df_combined$ADARB2)

c <- df_combined %>% 
  filter(Covid_Load != "Other Viruses") %>%
  compare_means(ADARB2~Covid_Load, data = .)
c %<>% mutate(y_pos = c(y_position *1), labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"

c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"#c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"

c$p.adj.format.stars[c$p.adj < 0.05] <- paste0("* (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.01] <- paste0("** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.001] <- paste0("*** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 2e-16] <- paste0("**** (p=", c$p.adj,")")


df_c <- as.data.frame(c)

adar3 <-
  df_combined %>%
  filter(Covid_Load != "Other Viruses") %>%
  ggplot(
               aes(y=ADARB2, x=Covid_Load))+
  geom_boxplot(aes(fill=Covid_Load)) +  geom_jitter( size=2, alpha = 0.2)+
  facet_grid(  ~ "ADAR3" ,  drop = TRUE, scales = "free", space = "free",)+
  themes_ggplot+ ylab("Normalized Expression (TPM)")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
  expand_limits(y = 0) + scale_fill_manual(values=c("blue", "red"))+
  theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
  geom_signif(data = df_c, 
              aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
  adar3

out_path <- paste0(OUT_DIR,'/',"SALMON_ADAR3_CovidComb.pdf")
ggsave(out_path, adar3, width = 10, height = 6)
plot_grid(adar1, adar2, adar3, labels = c('', '',''), label_size = 12, ncol=3)

out_path <- paste0(OUT_DIR,'/',"SALMON_all_adars_CovidComb.pdf")
ggsave(out_path, width = 14, height = 6)
y_position = max(df_combined$ADAR)

c <- df_combined %>% 
  # filter(Gene_change == "ADAR1") %>%
  compare_means(ADAR~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2, y_position *1.3, y_position*1.4,y_position *1.5,
                        y_position *1.6, y_position*1.7,y_position *1.8, y_position *1.9), 
              labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
# c <- c[c(1,3,2),] #reorder manually sorry
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"

c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
# c$p.adj.format.stars[c$p.adj < 0.1] <- c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"


c$p.adj.format.stars <- paste0(c$p.adj.format.stars," (p=",c$p.adj,")")
c$p.adj.format.stars[grep("ns",c$p.adj.format.stars) ] <- "ns"

df_c <- as.data.frame(c)

adar1_viralLoad <- ggplot(data=df_combined, 
               aes(y=ADAR, x=VIRAL_LOAD))+
  geom_boxplot(aes(fill=VIRAL_LOAD)) +  geom_jitter( size=2, alpha = 0.2)+
  facet_grid(  ~ "ADAR1" ,  drop = TRUE, scales = "free", space = "free",)+
  themes_ggplot+ ylab("Normalized Expression (TPM)")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
  expand_limits(y = 0) + scale_fill_manual(values=c("blue","yellow", "orange","red","purple"))+
  theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
  geom_signif(data = df_c, 
              aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
  adar1_viralLoad

out_path <- paste0(OUT_DIR,'/',"ViralLoad_SALMON_ADAR1.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)
y_position = max(df_combined$ADAR)

c <- df_combined %>% 
  filter(Covid_Load == "COVID-19") %>%
  compare_means(ADAR~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2), 
              labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
# c <- c[c(1,3,2),] #reorder manually sorry
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"

c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
c$p.adj.format.stars <- paste0(c$p.adj.format.stars," (p=",c$p.adj,")")
c$p.adj.format.stars[grep("ns",c$p.adj.format.stars) ] <- "ns"

df_c <- as.data.frame(c)

adar1_viralLoad <- 
  df_combined %>%
  filter(Covid_Load == "COVID-19") %>%
  ggplot(aes(y=ADAR, x=VIRAL_LOAD))+
  geom_boxplot(aes(fill=VIRAL_LOAD)) +  geom_jitter( size=2, alpha = 0.2)+
  facet_grid(  ~ "ADAR1" ,  drop = TRUE, scales = "free", space = "free",)+
  themes_ggplot+ ylab("Normalized Expression (TPM)")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
  expand_limits(y = 0) + scale_fill_manual(values=c("yellow", "orange","red"))+
  theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
  geom_signif(data = df_c, 
              aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
  adar1_viralLoad

out_path <- paste0(OUT_DIR,'/',"ViralLoad_SALMON_ADAR1_onlyCOVID.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)

ISG

y_position = max(df_combined$ISG38_score)

c <- df_combined %>% 
  filter(Covid_Load != "Other Viruses") %>%
  compare_means(ISG38_score~Covid_Load, data = .)
c %<>% mutate(y_pos = c(y_position *1), 
              labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"

c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"#c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"

c$p.adj.format.stars[c$p.adj < 0.05] <- paste0("* (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.01] <- paste0("** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.001] <- paste0("*** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 2e-16] <- paste0("**** (p=", c$p.adj,")")


df_c <- as.data.frame(c)

ISG38 <- 
  df_combined%>%
  filter(Covid_Load != "Other Viruses") %>%
  ggplot(
               aes(y=ISG38_score, x=Covid_Load))+
  geom_boxplot(aes(fill=Covid_Load)) +  geom_jitter( size=2, alpha = 0.2)+
  themes_ggplot+ ylab("Interferon-Stimulated Genes Score")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
  expand_limits(y = 0) + scale_fill_manual(values=c("blue","red"))+
  theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
  geom_signif(data = df_c, 
              aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
  ISG38

out_path <- paste0(OUT_DIR,'/',"ISG38_CovidComb.pdf")
ggsave(out_path, ISG38, width = 10, height = 6)
y_position = max(df_combined$ISG38_score)

c <- df_combined %>% 
  # filter(Gene_change == "ADAR1") %>%
  compare_means(ISG38_score ~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2, y_position *1.3, y_position*1.4,y_position *1.5,
                        y_position *1.6, y_position*1.7,y_position *1.8, y_position *1.9), 
              labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
# c <- c[c(1,3,2),] #reorder manually sorry
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"

c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"


df_c <- as.data.frame(c)

adar1_viralLoad <- ggplot(data=df_combined, 
               aes(y=ISG38_score, x=VIRAL_LOAD))+
  geom_boxplot(aes(fill=VIRAL_LOAD)) +  geom_jitter( size=2, alpha = 0.2)+
  # facet_grid(  ~ "ADAR1" ,  drop = TRUE, scales = "free", space = "free",)+
  themes_ggplot+ ylab("Interferon-Stimulated Genes Score")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
  expand_limits(y = 0) + scale_fill_manual(values=c("blue","yellow", "orange","red","purple"))+
  theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
  geom_signif(data = df_c, 
              aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
  adar1_viralLoad

out_path <- paste0(OUT_DIR,'/',"ViralLoad_ISG38_score.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)

Index - AluEditingIndex

 edit_type_names <- c("A2GEditingIndex_Alu" = "A-G (T-C)", "A2TEditingIndex_Alu" = "A-T (T-A)", 
                      "C2AEditingIndex_Alu" = "C-A (G-T)", "C2TEditingIndex_Alu" = "C-T (G-A)", 
                      "C2GEditingIndex_Alu" = "C-G (G-C)",  "A2CEditingIndex_Alu" = "A-C (T-G)")

AG_clean <-df_combined %>%
  dplyr::select("Run", "A2GEditingIndex_Alu","A2TEditingIndex_Alu",
         "C2AEditingIndex_Alu","C2TEditingIndex_Alu","C2GEditingIndex_Alu","A2CEditingIndex_Alu") %>%
  gather(edit_type, value, -Run) %>%
 ggplot( 
               aes(y=value, x=edit_type ,fill=edit_type))+
  geom_boxplot(aes(fill=edit_type)) +  
  themes_ggplot+
  # theme(axis.text.x = element_text(angle = 90))+
  ylab("ALU Editing Index")+ 
  xlab ("Nucleotide mismatch")+
  scale_fill_discrete(name = "Mistmatch:")+
  theme(axis.title.x = element_blank())+
  theme(legend.position = "none") +
  scale_fill_manual(values=c("yellow", "red", "blue", "green", "orange", "black"))+
  scale_x_discrete(labels= edit_type_names)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
AG_clean

out_path <- paste0(OUT_DIR,'/',"AluEditing_cleanSignal.pdf")
ggsave(out_path, AG_clean, width = 10, height = 6)
y_position = max(df_combined$A2GEditingIndex_Alu)

df_combined %>% 
  mutate(AG = A2GEditingIndex_Alu) %>% 
  compare_means(AG ~ Covid_Load, data = .)
## # A tibble: 3 × 8
##   .y.   group1        group2                 p   p.adj p.format p.signif method 
##   <chr> <chr>         <chr>              <dbl>   <dbl> <chr>    <chr>    <chr>  
## 1 AG    Control       Other Viruses 0.126      0.13    0.126    ns       Wilcox…
## 2 AG    Control       COVID-19      0.00000672 0.00002 6.7e-06  ****     Wilcox…
## 3 AG    Other Viruses COVID-19      0.0563     0.11    0.056    ns       Wilcox…
c <- df_combined %>% 
  mutate(AG = A2GEditingIndex_Alu) %>% 
  filter(Covid_Load != "Other Viruses") %>%
  compare_means(AG ~ Covid_Load, data = .)
c %<>% mutate(y_pos = c(y_position *1), 
              labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"

c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns" #c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"


c$p.adj.format.stars[c$p.adj < 0.05] <- paste0("* (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.01] <- paste0("** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.001] <- paste0("*** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 2e-16] <- paste0("**** (p=", c$p.adj,")")


df_c <- as.data.frame(c)

AluEditingIndex <- df_combined %>%
  filter(Covid_Load != "Other Viruses") %>% 
  ggplot(
               aes(y=A2GEditingIndex_Alu, x=Covid_Load))+
  geom_boxplot(aes(fill=Covid_Load)) +  geom_jitter( size=2, alpha = 0.2)+
  themes_ggplot+ ylab("ALU Editing Index")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
  expand_limits(y = 0) + scale_fill_manual(values=c("blue","red"))+
  theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
  geom_signif(data = df_c, 
              aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
  AluEditingIndex

out_path <- paste0(OUT_DIR,'/',"AluEditing_AG.pdf")
ggsave(out_path, AluEditingIndex, width = 10, height = 6)
y_position = max(df_combined$A2GEditingIndex_Alu)

c <- df_combined %>% 
  # filter(Gene_change == "ADAR1") %>%
  mutate(AG = A2GEditingIndex_Alu) %>%
  compare_means(AG ~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2, y_position *1.3, y_position*1.4,y_position *1.5,
                        y_position *1.6, y_position*1.7,y_position *1.8, y_position *1.9), 
              labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"

c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"


df_c <- as.data.frame(c)

adar1_viralLoad <- ggplot(data=df_combined, 
               aes(y=A2GEditingIndex_Alu, x=VIRAL_LOAD))+
  geom_boxplot(aes(fill=VIRAL_LOAD)) +  geom_jitter( size=2, alpha = 0.2)+
  # facet_grid(  ~ "ADAR1" ,  drop = TRUE, scales = "free", space = "free",)+
  themes_ggplot+ ylab("ALU Editing Index")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
  expand_limits(y = 0) + scale_fill_manual(values=c("blue","yellow", "orange","red","purple"))+
  theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
  geom_signif(data = df_c, 
              aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
  adar1_viralLoad

out_path <- paste0(OUT_DIR,'/',"ViralLoad_Alu_Index.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)
y_position = max(df_combined$A2GEditingIndex_Alu)

c <- df_combined %>% 
  filter(Covid_Load == "COVID-19") %>%
  mutate(AG = A2GEditingIndex_Alu) %>%
  compare_means(AG ~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2), 
              labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
# c <- c[c(1,3,2),] #reorder manually sorry
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"

c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"


df_c <- as.data.frame(c)

adar1_viralLoad <- 
  df_combined %>%
  filter(Covid_Load == "COVID-19") %>%
  ggplot(aes(y=A2GEditingIndex_Alu, x=VIRAL_LOAD))+
  geom_boxplot(aes(fill=VIRAL_LOAD)) +  geom_jitter( size=2, alpha = 0.2)+
  # facet_grid(  ~ "ADAR1" ,  drop = TRUE, scales = "free", space = "free",)+
  themes_ggplot+ ylab("ALU Editing Index")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
  expand_limits(y = 0) + scale_fill_manual(values=c("yellow", "orange","red"))+
  theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
  geom_signif(data = df_c, 
              aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
  adar1_viralLoad

out_path <- paste0(OUT_DIR,'/',"ViralLoad_NoControl_index.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)

Index - 3’UTEditingIndex

edit_type_names <- c("A2GEditingIndex_3UTR" = "A-G (T-C)", "A2TEditingIndex_3UTR" = "A-T (T-A)", 
                      "C2AEditingIndex_3UTR" = "C-A (G-T)", "C2TEditingIndex_3UTR" = "C-T (G-A)", 
                      "C2GEditingIndex_3UTR" = "C-G (G-C)",  "A2CEditingIndex_3UTR" = "A-C (T-G)")

AG_3UTR_clean <-df_combined %>%
  dplyr::select("Run", "A2GEditingIndex_3UTR","A2TEditingIndex_3UTR",
         "C2AEditingIndex_3UTR","C2TEditingIndex_3UTR","C2GEditingIndex_3UTR","A2CEditingIndex_3UTR") %>%
  gather(edit_type, value, -Run) %>%
ggplot( 
               aes(y=value, x=edit_type ,fill=edit_type))+
  geom_boxplot(aes(fill=edit_type)) +  
  themes_ggplot+
  ylab("3'UTR ALU Editing Index")+ 
  xlab ("Nucleotide mismatch")+
  scale_fill_discrete(name = "Mistmatch:")+
  theme(axis.title.x = element_blank())+
  theme(legend.position = "none") +
  scale_fill_manual(values=c("yellow", "red", "blue", "green", "orange", "black"))+
  scale_x_discrete(labels= edit_type_names)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
AG_3UTR_clean

out_path <- paste0(OUT_DIR,'/',"3UTREditing_cleanSignal.pdf")
ggsave(out_path, AG_3UTR_clean, width = 10, height = 6)
y_position = max(df_combined$A2GEditingIndex_3UTR)

df_combined %>% 
  mutate(AG = A2GEditingIndex_3UTR) %>% 
  compare_means(AG ~ Covid_Load, data = .)
## # A tibble: 3 × 8
##   .y.   group1        group2               p   p.adj p.format p.signif method  
##   <chr> <chr>         <chr>            <dbl>   <dbl> <chr>    <chr>    <chr>   
## 1 AG    Control       Other Viruses 2.43e- 4 2.4e- 4 0.00024  ***      Wilcoxon
## 2 AG    Control       COVID-19      7.03e-33 2.1e-32 < 2e-16  ****     Wilcoxon
## 3 AG    Other Viruses COVID-19      8.39e- 8 1.7e- 7 8.4e-08  ****     Wilcoxon
c <- df_combined %>% 
  mutate(AG = A2GEditingIndex_3UTR) %>% 
  compare_means(AG ~ Covid_Load, data = .)
c %<>% mutate(y_pos = c(y_position *1,y_position *1.1,y_position *1.2), 
              labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <-  "ns"  #c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"

c$p.adj.format.stars <- paste0(c$p.adj.format.stars," (p=",c$p.adj,")")
c$p.adj.format.stars[grep("ns",c$p.adj.format.stars) ] <- "ns"

df_c <- as.data.frame(c)

UTR3EditingIndex <- df_combined%>%
  # filter(Covid_Load != "Other Viruses") %>%
  ggplot( 
               aes(y=A2GEditingIndex_3UTR, x=Covid_Load))+
  geom_boxplot(aes(fill=Covid_Load)) +  geom_jitter( size=2, alpha = 0.2)+
  # facet_grid(  ~ "3'UTR Editing Index" ,  drop = TRUE, scales = "free", space = "free",)+
  themes_ggplot+ ylab("3'UTR ALU Editing Index")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
  expand_limits(y = 0) + scale_fill_manual(values=c("blue","yellow","red"))+
  theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
  geom_signif(data = df_c, 
              aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
  UTR3EditingIndex

  out_path <- paste0(OUT_DIR,'/',"3UTRAluEditing_AG.pdf")
ggsave(out_path, UTR3EditingIndex, width = 10, height = 6)
y_position = max(df_combined$A2GEditingIndex_3UTR)

c <- df_combined %>% 
  mutate(AG = A2GEditingIndex_3UTR) %>%
  compare_means(AG ~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2, 
                        y_position *1.3, y_position*1.4,y_position *1.5,
                        y_position *1.6, y_position*1.7,y_position *1.8, y_position *1.9), 
              labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))

c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"

c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"


df_c <- as.data.frame(c)

adar1_viralLoad <- ggplot(data=df_combined, 
               aes(y=A2GEditingIndex_3UTR, x=VIRAL_LOAD))+
  geom_boxplot(aes(fill=VIRAL_LOAD)) +  geom_jitter( size=2, alpha = 0.2)+
  themes_ggplot+ ylab("3'UTR ALU Editing Index")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
  expand_limits(y = 0) + scale_fill_manual(values=c("blue","yellow", "orange","red","purple"))+
  theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
  geom_signif(data = df_c, 
              aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
  adar1_viralLoad

out_path <- paste0(OUT_DIR,'/',"ViralLoad_Alu_3utrIndex.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)
y_position = max(df_combined$A2GEditingIndex_3UTR)

c <- df_combined %>% 
  filter(Covid_Load == "COVID-19") %>%
  mutate(AG = A2GEditingIndex_3UTR) %>%
  compare_means(AG ~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2), 
              labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
# c <- c[c(1,3,2),] #reorder manually sorry
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"

c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"

c$p.adj.format.stars[c$p.adj < 0.05] <- paste0("* (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.01] <- paste0("** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.001] <- paste0("*** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 2e-16] <- paste0("**** (p=", c$p.adj,")")

df_c <- as.data.frame(c)

adar1_viralLoad <- 
  df_combined %>%
  filter(Covid_Load == "COVID-19") %>%
  ggplot(aes(y=A2GEditingIndex_3UTR, x=VIRAL_LOAD))+
  geom_boxplot(aes(fill=VIRAL_LOAD)) +  geom_jitter( size=2, alpha = 0.2)+
  themes_ggplot+ ylab("3'UTR ALU Editing Index")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
  expand_limits(y = 0) + scale_fill_manual(values=c("yellow", "orange","red"))+
  theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
  geom_signif(data = df_c, 
              aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
  adar1_viralLoad

out_path <- paste0(OUT_DIR,'/',"ViralLoad_NoControl_3utrindex.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)

correlation

corr_3UTR_ADAR <- df_combined %>% 
ggplot(aes(y=A2GEditingIndex_3UTR, x=ADAR))+
  geom_point(aes(color=Covid_Load),size=3, alpha = 0.4) + themes_ggplot+ geom_smooth(method='lm', se=F, colour ="black")+
  stat_cor(method = "pearson", label.x.npc = "left", label.y.npc = "top") + scale_color_manual(values=c("blue","yellow","red"), name = "Group:") +
  ylab("3'UTR ALU Editing Index")+ xlab ("ADAR1 Normalized Expression (TPM)")

corr_3UTR_ADAR
## `geom_smooth()` using formula 'y ~ x'

out_path <- paste0(OUT_DIR,'/',"corr_3UTR_ADAR.pdf")
ggsave(out_path, corr_3UTR_ADAR, width = 10, height = 6)
## `geom_smooth()` using formula 'y ~ x'
corr_ISG_ADAR <- df_combined %>% 
ggplot(aes(y=ISG38_score, x=ADAR))+
  geom_point(aes(color=Covid_Load),size=3, alpha = 0.4) + themes_ggplot+ geom_smooth(method='loess', se=F, colour ="black")+
  stat_cor(method = "pearson", label.x.npc = "left", label.y.npc = "top") + scale_color_manual(values=c("blue","yellow","red"), name = "Group:") +
  ylab("Interferon-Stimulated Genes Score")+ xlab ("ADAR1 Normalized Expression (TPM)")

corr_ISG_ADAR
## `geom_smooth()` using formula 'y ~ x'

out_path <- paste0(OUT_DIR,'/',"corr_ISG_ADAR.pdf")
ggsave(out_path, corr_ISG_ADAR, width = 10, height = 6)
## `geom_smooth()` using formula 'y ~ x'
corr_ISG_UTR3 <- df_combined %>% 
ggplot(aes(x=A2GEditingIndex_3UTR, y=ISG38_score))+
  geom_point(aes(color=Covid_Load),size=3, alpha = 0.4) + themes_ggplot+ geom_smooth(method='lm', se=F, colour ="black")+# facet_grid(~Covid_Load)+
  stat_cor(method = "pearson", label.x.npc = "left", label.y.npc = "top") + scale_color_manual(values=c("blue","yellow","red"), name = "Group:") +
  xlab("Interferon-Stimulated Genes Score")+ ylab ("3'UTR ALU Editing Index")

corr_ISG_UTR3
## `geom_smooth()` using formula 'y ~ x'

out_path <- paste0(OUT_DIR,'/',"corr_corr_ISG_UTR3.pdf")
ggsave(out_path, corr_ISG_ADAR, width = 10, height = 6)
## `geom_smooth()` using formula 'y ~ x'
df_combined$VIRAL_LOAD_changedName <- as.character(df_combined$VIRAL_LOAD)
df_combined$VIRAL_LOAD_changedName[df_combined$VIRAL_LOAD_changedName == "None"] <- "Contol"
df_combined$VIRAL_LOAD_changedName[df_combined$VIRAL_LOAD_changedName == "Low"] <- "Low\nviral load"
df_combined$VIRAL_LOAD_changedName[df_combined$VIRAL_LOAD_changedName == "Medium"] <- "Medium\nviral load"
df_combined$VIRAL_LOAD_changedName[df_combined$VIRAL_LOAD_changedName == "High"] <- "High\nviral load"
df_combined$VIRAL_LOAD_changedName[df_combined$VIRAL_LOAD_changedName == "OtherViralInfection"] <- "Other Viruses"


df_combined$VIRAL_LOAD_changedName <-factor(df_combined$VIRAL_LOAD_changedName, levels = c(
  "Contol", "Other Viruses", "Low\nviral load", "Medium\nviral load", "High\nviral load"
))

corr_ISG_UTR3_viral <- df_combined %>% 
ggplot(aes(x=A2GEditingIndex_3UTR, y=ISG38_score))+
  geom_point(aes(color=VIRAL_LOAD_changedName),size=3, alpha = 0.4) + themes_ggplot+ geom_smooth(method='lm', se=F, colour ="black")+# facet_grid(~Covid_Load)+
  stat_cor(method = "pearson", label.x.npc = "left", label.y.npc = "top") + scale_color_manual(values=c("blue","yellow", "orange","red","purple"), name = "Group:") +
  xlab("Interferon-Stimulated Genes Score")+ ylab ("3'UTR ALU Editing Index")

corr_ISG_UTR3_viral
## `geom_smooth()` using formula 'y ~ x'

out_path <- paste0(OUT_DIR,'/',"corr_corr_ISG_UTR3_viral.pdf")
ggsave(out_path, corr_ISG_UTR3_viral, width = 10, height = 6)
## `geom_smooth()` using formula 'y ~ x'
df_combined$VIRAL_LOAD <- factor(df_combined$VIRAL_LOAD, levels = c("None","OtherViralInfection","Low" ,"Medium", "High" ))
corr_ISG_UTR3_viral_grid <- df_combined %>% 
ggplot(aes(x=A2GEditingIndex_3UTR, y=ISG38_score))+
  geom_point(aes(color=Covid_Load),size=3, alpha = 0.4) + themes_ggplot+ geom_smooth(method='lm', se=F, colour ="black")+# facet_grid(~Covid_Load)+
  stat_cor(method = "pearson", label.x.npc = "left", label.y.npc = "top") + scale_color_manual(values=c("blue","yellow","red"), name = "Group:") +
  facet_grid(~VIRAL_LOAD_changedName)+
  xlab("Interferon-Stimulated Genes Score")+ ylab ("3'UTR ALU Editing Index")

corr_ISG_UTR3_viral_grid
## `geom_smooth()` using formula 'y ~ x'

out_path <- paste0(OUT_DIR,'/',"corr_corr_ISG_UTR3_viral_grid.pdf")
ggsave(out_path, corr_ISG_UTR3_viral_grid, width = 14, height = 6)
## `geom_smooth()` using formula 'y ~ x'
corr_ISG_UTR3_viral_grid <- df_combined %>% 
  filter(Covid_Load != "Other Viruses") %>%
ggplot(aes(x=A2GEditingIndex_3UTR, y=ISG38_score))+
  geom_point(aes(color=VIRAL_LOAD_changedName),size=3, alpha = 0.4) + themes_ggplot+ geom_smooth(method='lm', se=F, colour ="black")+# facet_grid(~Covid_Load)+
  stat_cor(method = "pearson", label.x.npc = "left", label.y.npc = "top") + scale_color_manual(values=c("blue","yellow","orange","red"), name = "Group:") +
  facet_grid(~VIRAL_LOAD_changedName)+
  xlab("Interferon-Stimulated Genes Score")+ ylab ("3'UTR ALU Editing Index") + theme(legend.position="none")


corr_ISG_UTR3_viral_grid
## `geom_smooth()` using formula 'y ~ x'

out_path <- paste0(OUT_DIR,'/',"corr_corr_ISG_UTR3_viral_grid_onlyCOVID2.pdf")
ggsave(out_path, corr_ISG_UTR3_viral_grid, width = 10, height = 6)
## `geom_smooth()` using formula 'y ~ x'